# Clean Environment -----------------------------------------------------
rm(list = ls())

# Load Data -------------------------------------------------------------
df_cjoint <- readRDS("cjoint_data.rds")
df_main_raw <- readRDS("survey_data_subset.rds")
rename_uod <- read_excel("uod_rename.xlsx")
rename_likert <- as.data.frame(read_excel("likert_rename.xlsx"))

# Recode CJOINT ---------------------------------------------------------
df_cjoint %>%
  mutate(
    attr_age_fac = case_when(
      attr_age < 50 ~ "40-49",
      attr_age >= 50 & attr_age < 60 ~ "50-59",
      attr_age >= 60 ~ "60-65"
    ),
    attr_party = case_when(
      attr_party == "PiS" ~ "Law and Justice (PiS)",
      attr_party == "KO" ~ "Civic Coalition",
      attr_party == "Polska 2050" ~ "Poland 2050",
      attr_party == "Konfederacja" ~ "Confederation",
      attr_party == "Lewica" ~ "The Left"
    ),
    attr_party = factor(attr_party, levels = c("Law and Justice (PiS)", "Civic Coalition", "Poland 2050", "Confederation", "The Left")),
    attr_media = factor(attr_media, levels = c("media_L", "media_M", "media_A")),
    attr_judges = factor(attr_judges, levels = c("judges_L", "judges_M", "judges_A")),
    attr_judges_L = ifelse(attr_judges == "judges_L", 1, 0),
    attr_judges_M = ifelse(attr_judges == "judges_M", 1, 0),
    attr_judges_A = ifelse(attr_judges == "judges_A", 1, 0),
    attr_media_L = ifelse(attr_media == "media_L", 1, 0),
    attr_media_M = ifelse(attr_media == "media_M", 1, 0),
    attr_media_A = ifelse(attr_media == "media_A", 1, 0),
    cand_liberal = case_when(
      attr_judges == "judges_L" & attr_media == "media_L" ~ "liberal",
      TRUE ~ "non-liberal"
    ),
    cand_majoritarian = case_when(
      attr_judges == "judges_M" & attr_media == "media_M" ~ "majoritarian",
      TRUE ~ "non-majoritarian"
    ),
    cand_authoritarian = case_when(
      attr_judges == "judges_A" & attr_media == "media_A" ~ "authoritarian",
      TRUE ~ "non-authoritarian"
    ),
    choice_factor = factor(choice, levels = c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12")),
    across(starts_with("attr"), as.factor)
  ) -> df_cjoint_recoded

# Recode Covariates ------------------------------------------------------

duration_mean <- mean(df_main_raw$duration)
duration_sd <- sd(df_main_raw$duration)

df_main_raw %>%
  dplyr::select(
    ResponseId, birthyear, age_group, Q103, Q101A_1:Q101A_4, duration,
    gender, education_level2_pl_rc, income_PL_rc, financial_situation_rc,
    Q102_1, Q102_2, Q102_3, Q102_15, Q102_16, Q102_18, Q102_5, Q102_7, Q102_8,
    Q1_4, Q2_4, Q4_4, contains("Q117_")
  ) %>%
  rename(
    party_preference = Q103,
    check_1_1 = Q1_4,
    check_1_2 = Q2_4,
    check_1_3 = Q4_4,
    check_2a = Q101A_1,
    check_2b = Q101A_2,
    check_2c = Q101A_3,
    check_2d = Q101A_4,
    education = education_level2_pl_rc,
    income = income_PL_rc,
    financial_situation = financial_situation_rc,
    bench_judges_lib = Q117_1,
    bench_judges_maj = Q117_2,
    bench_judges_auth = Q117_3,
    bench_media_lib = Q117_4,
    bench_media_maj = Q117_5,
    bench_media_auth = Q117_6,
    L1 = Q102_1,
    L2 = Q102_2,
    L3 = Q102_3,
    M1 = Q102_15,
    M2 = Q102_16,
    M3 = Q102_18,
    A1 = Q102_5,
    A2 = Q102_7,
    A3 = Q102_8
  ) %>%
  mutate(
    party_preference = case_when(
      party_preference %in% c("Don't know", "Would not support any of these") ~ "Don't know/none",
      TRUE ~ party_preference
    ),
    party_preference = factor(party_preference, levels = c("Law and Justice (PiS)", "Civic Coalition", "Poland 2050", "Confederation", "The Left", "Don't know/none")),
    party_preference_attr_corr = factor(party_preference, levels = c("Law and Justice (PiS)", "Civic Coalition", "Poland 2050", "Confederation", "The Left")),
    age = 2021 - as.numeric(birthyear),
    duration = as.numeric(duration),
    income = case_when(income == "NA" ~ "No response", TRUE ~ income),
    income = factor(income, levels = c("<2000 zł", "2001-3000 zł", "3001-4000 zł", "4001-6000 zł", ">6000 zł", "No response")),
    education = case_when(education == "NA" ~ "No response", TRUE ~ education),
    education = factor(education, levels = c("Primary/vocational", "Secondary/Post-secondary", "Higher", "No response")),
    financial_situation = factor(financial_situation, levels = c("Very poor/poor", "Fair", "Good/very good")),
    speeder = ifelse(duration < (duration_mean - 2 * duration_sd), 1, 0),
    check_1_pass = case_when(
      check_1_1 == "Radio" | check_1_2 == "Radio" | check_1_3 == "Radio" ~ 1,
      TRUE ~ 0
    ),
    check_2_pass = case_when(
      check_2a == "Slightly important" |
        check_2b == "Slightly important" |
        check_2c == "Slightly important" |
        check_2d == "Slightly important" ~ 1,
      TRUE ~ 0
    ),
    check_all_pass = case_when(check_1_pass == 1 & check_2_pass == 1 ~ 1, TRUE ~ 0)
  ) %>%
  mutate(across(starts_with("bench_"), ~ as.numeric(case_when(
    . == "Not at all democratic" ~ 1,
    . == "Not democratic" ~ 2,
    . == "Slightly not democratic" ~ 3,
    . == "Neither not democratic nor democratic" ~ 4,
    . == "Slightly democratic" ~ 5,
    . == "Democratic" ~ 6,
    . == "Extremely democratic" ~ 7
  )))) -> df_covariates

# UOD Factor Scores ------------------------------------------------------
df_uod <- df_covariates %>%
  filter(check_1_pass == 1 | check_2_pass == 1, speeder != 1) %>%
  dplyr::select(ResponseId, L1, L2, L3, M1, M2, M3, A1, A2, A3)

# Rename levels to numeric
for (i in 1:nrow(rename_likert)) {
  df_uod[df_uod == rename_likert[i, 1]] <- rename_likert[i, 2]
}

# Transform text into numeric for Likert variables
df_uod[, 2:10] <- sapply(df_uod[, 2:10], as.numeric)

# Cronbach's Alpha -------------------------------------------------------
df_uod %>% dplyr::select(L1, L2, L3) -> liberal
df_uod %>% dplyr::select(M1, M2, M3) -> majority
df_uod %>% dplyr::select(A1, A2, A3) -> authoritarian

cronbach.alpha(liberal)
cronbach.alpha(majority)
cronbach.alpha(authoritarian)

# Measurement Model ------------------------------------------------------
cfaord <- '
lib =~ L1 + L2 + L3
maj =~ M1 + M2 + M3
auth =~ A1 + A2 + A3

lib ~~ maj
lib ~~ auth
maj ~~ auth
'

fit_ord <- cfa(cfaord, ordered = TRUE, data = df_uod)
summary(fit_ord, fit.measures = TRUE)
fitmeasures(fit_ord, c("cfi", "rmsea", "chisq"))

save(fit_ord, file = "measurement_model.RData")

factor_scores <- data.frame(lavPredict(fit_ord, newdata = df_uod))
df_uod_factors <- cbind(df_uod %>% dplyr::select(ResponseId), factor_scores)

# Merge Data -------------------------------------------------------------
df_cjoint_recoded %>%
  left_join(df_uod_factors, by = "ResponseId") %>%
  left_join(df_covariates, by = "ResponseId") %>%
  filter(check_1_pass == 1 | check_2_pass == 1) %>%
  mutate(party_overlap = as.factor(case_when(attr_party == party_preference_attr_corr ~ "Yes", TRUE ~ "No"))) -> df_full_cjoint

df_cjoint_recoded %>%
  left_join(df_covariates, by = "ResponseId") -> df_full_cjoint_w_inattentive

df_uod_factors %>%
  left_join(df_covariates, by = "ResponseId") -> df_full_uod

# Save Scores ------------------------------------------------------------
saveRDS(df_full_cjoint, "df_full_cjoint.rds")
saveRDS(df_full_cjoint_w_inattentive, "df_full_cjoint_w_inattentive.rds")
saveRDS(df_full_uod, "df_full_uod.rds")
saveRDS(df_uod, "df_uod_items.rds")